Chapter 17 Exercises - Reproducing the the ‘serial dilution assay’

My goal here is to fit the serial dilution model model and perform basic inferential analysis as demonstrated in Chapter 17. The original publication by Gelman et al. is also available1. Unfortunately, I could not find the original data analyzed and was only able to copy the standards data from the book.

Setup

library(rstan)
library(tidybayes)
library(tidyverse)

options(mc.cores = parallel::detectCores())
rstan_options(auto_write = TRUE)

theme_set(
  theme_classic() +
    theme(
      panel.grid.major = element_line(),
      strip.background = element_blank()
    )
)

STAN_RED <- "#B2171D"

Data copied from the book Figure 19.3 on page 473.

dilution_standards_data <- tibble::tribble(
  ~conc, ~dilution, ~y,
  0.64, 1, c(101.8, 121.4),
  0.32, 1 / 2, c(105.2, 114.1),
  0.16, 1 / 4, c(92.7, 93.3),
  0.08, 1 / 8, c(72.4, 61.1),
  0.04, 1 / 16, c(57.6, 50.0),
  0.02, 1 / 32, c(38.5, 35.1),
  0.01, 1 / 64, c(26.6, 25.0),
  0, 0, c(14.7, 14.2),
) %>%
  mutate(rep = purrr::map(conc, ~ c("a", "b"))) %>%
  unnest(c(y, rep))
head(dilution_standards_data)
#> # A tibble: 6 × 4
#>    conc dilution     y rep  
#>   <dbl>    <dbl> <dbl> <chr>
#> 1  0.64     1    102.  a    
#> 2  0.64     1    121.  b    
#> 3  0.32     0.5  105.  a    
#> 4  0.32     0.5  114.  b    
#> 5  0.16     0.25  92.7 a    
#> 6  0.16     0.25  93.3 b

The following plot shows the two standard dilution curves. They are quite similar.

dilution_standards_data %>%
  ggplot(aes(x = conc, y = y, color = rep)) +
  geom_line(alpha = 0.5, linetype = 2) +
  geom_point(alpha = 0.8) +
  scale_x_continuous(expand = expansion(c(0, 0.02)), limits = c(0, NA)) +
  scale_y_continuous(expand = expansion(c(0, 0.02)), limits = c(0, NA)) +
  scale_color_brewer(type = "qual", palette = "Set1")

Modeling

Model specification

The Stan model is quite simple, but I ran into some problems fitting it, discussed below. The centrality and variance of the likelihood are calculated separately as g and tau so they can be used in the model and generated quantities block without duplicating the code. The log_lik is calculated so that PSIS-LOO cross validation could be conducted if desired.

dilution_model_file <- here::here("models", "serial-dilution.stan")
writeLines(readLines(dilution_model_file))
#> data {
#>   int<lower=0> N;  // number of data points
#>   int<lower=0> A;  // constant used in model of measurement error
#>   vector<lower=0>[N] x;  // concentration values
#>   vector<lower=0>[N] y;  // observed color intensity
#> }
#> 
#> parameters {
#>   vector<lower=0>[4] beta;
#>   real<lower=0,upper=1> alpha;
#>   real<lower=0> sigma;
#> }
#> 
#> transformed parameters {
#>   vector<lower=0>[N] g;
#>   vector<lower=0>[N] tau;
#> 
#>   for (i in 1:N) {
#>     g[i] = beta[1] + beta[2] / (1 + (x[i] / beta[3]) ^ (-beta[4]));
#>     tau[i] = ((g[i] / A) ^ (2.0 * alpha)) * (sigma ^ 2.0);
#>   }
#> }
#> 
#> model {
#>   // Priors
#>   alpha ~ beta(1, 1);
#>   beta[1] ~ normal(10, 2.5);
#>   beta[2] ~ normal(100, 5);
#>   beta[3] ~ normal(0, 1);
#>   beta[4] ~ normal(0, 2.5);
#>   sigma ~ normal(0, 2.5);
#> 
#>   // Likelihood
#>   for (i in 1:N) {
#>     y[i] ~ normal(g[i], tau[i]);
#>   }
#> }
#> 
#> generated quantities {
#>   vector[N] ypred;
#>   vector[N] log_lik;
#> 
#>   for (i in 1:N) {
#>     ypred[i] = normal_rng(g[i], tau[i]);
#>     log_lik[i] = normal_lpdf(y[i] | g[i], tau[i]);
#>   }
#> }

Sampling

As mentioned above, there were some problems with fitting the model. The main problem was that the \(\beta\) parameters are on wildly different scales, such as \(\beta_2\) is around 100 while \(\beta_3\) is less than 0.1. Therefore, in order for the model to fit correctly, I needed to assign different prior distributions for each parameter.

model_data <- list(
  N = nrow(dilution_standards_data),
  A = 30,
  x = dilution_standards_data$conc,
  y = dilution_standards_data$y
)


dilution_model <- stan(
  dilution_model_file,
  model_name = "serial-dilution",
  data = model_data
)
#> Running /Library/Frameworks/R.framework/Resources/bin/R CMD SHLIB \
#>   foo.c
#> clang -mmacosx-version-min=10.13 -I"/Library/Frameworks/R.framework/Resources/include" -DNDEBUG   -I"/Users/admin/Library/Application Support/renv/cache/v5/R-4.1/x86_64-apple-darwin17.0/Rcpp/1.0.7/dab19adae4440ae55aa8a9d238b246bb/Rcpp/include/"  -I"/Users/admin/Developer/Python/bayesian-data-analysis-course/renv/library/R-4.1/x86_64-apple-darwin17.0/RcppEigen/include/"  -I"/Users/admin/Developer/Python/bayesian-data-analysis-course/renv/library/R-4.1/x86_64-apple-darwin17.0/RcppEigen/include/unsupported"  -I"/Users/admin/Developer/Python/bayesian-data-analysis-course/renv/library/R-4.1/x86_64-apple-darwin17.0/BH/include" -I"/Users/admin/Library/Application Support/renv/cache/v5/R-4.1/x86_64-apple-darwin17.0/StanHeaders/2.21.0-7/0459d4dd7a8c239be18469a30c23dd4b/StanHeaders/include/src/"  -I"/Users/admin/Library/Application Support/renv/cache/v5/R-4.1/x86_64-apple-darwin17.0/StanHeaders/2.21.0-7/0459d4dd7a8c239be18469a30c23dd4b/StanHeaders/include/"  -I"/Users/admin/Library/Application Support/renv/cache/v5/R-4.1/x86_64-apple-darwin17.0/RcppParallel/5.1.4/0325b5be38a02d192828027983c7b470/RcppParallel/include/"  -I"/Users/admin/Library/Application Support/renv/cache/v5/R-4.1/x86_64-apple-darwin17.0/rstan/2.21.2/52772d81aa532a6331fd535701882c12/rstan/include" -DEIGEN_NO_DEBUG  -DBOOST_DISABLE_ASSERTS  -DBOOST_PENDING_INTEGER_LOG2_HPP  -DSTAN_THREADS  -DBOOST_NO_AUTO_PTR  -include '/Users/admin/Library/Application Support/renv/cache/v5/R-4.1/x86_64-apple-darwin17.0/StanHeaders/2.21.0-7/0459d4dd7a8c239be18469a30c23dd4b/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp'  -D_REENTRANT -DRCPP_PARALLEL_USE_TBB=1   -I/usr/local/include   -fPIC  -Wall -g -O2  -c foo.c -o foo.o
#> In file included from <built-in>:1:
#> In file included from /Users/admin/Library/Application Support/renv/cache/v5/R-4.1/x86_64-apple-darwin17.0/StanHeaders/2.21.0-7/0459d4dd7a8c239be18469a30c23dd4b/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
#> In file included from /Users/admin/Developer/Python/bayesian-data-analysis-course/renv/library/R-4.1/x86_64-apple-darwin17.0/RcppEigen/include/Eigen/Dense:1:
#> In file included from /Users/admin/Developer/Python/bayesian-data-analysis-course/renv/library/R-4.1/x86_64-apple-darwin17.0/RcppEigen/include/Eigen/Core:88:
#> /Users/admin/Developer/Python/bayesian-data-analysis-course/renv/library/R-4.1/x86_64-apple-darwin17.0/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:1: error: unknown type name 'namespace'
#> namespace Eigen {
#> ^
#> /Users/admin/Developer/Python/bayesian-data-analysis-course/renv/library/R-4.1/x86_64-apple-darwin17.0/RcppEigen/include/Eigen/src/Core/util/Macros.h:628:16: error: expected ';' after top level declarator
#> namespace Eigen {
#>                ^
#>                ;
#> In file included from <built-in>:1:
#> In file included from /Users/admin/Library/Application Support/renv/cache/v5/R-4.1/x86_64-apple-darwin17.0/StanHeaders/2.21.0-7/0459d4dd7a8c239be18469a30c23dd4b/StanHeaders/include/stan/math/prim/mat/fun/Eigen.hpp:13:
#> In file included from /Users/admin/Developer/Python/bayesian-data-analysis-course/renv/library/R-4.1/x86_64-apple-darwin17.0/RcppEigen/include/Eigen/Dense:1:
#> /Users/admin/Developer/Python/bayesian-data-analysis-course/renv/library/R-4.1/x86_64-apple-darwin17.0/RcppEigen/include/Eigen/Core:96:10: fatal error: 'complex' file not found
#> #include <complex>
#>          ^~~~~~~~~
#> 3 errors generated.
#> make: *** [foo.o] Error 1

Posterior distributions

rstan::stan_trace(dilution_model, pars = "beta", ncol=1) +
  scale_x_continuous(expand = expansion(c(0, 0))) +
  scale_y_continuous(expand = expansion(c(0.02, 0.02)))

print(dilution_model, pars=c("beta", "sigma", "alpha"))
#> Inference for Stan model: serial-dilution.
#> 4 chains, each with iter=2000; warmup=1000; thin=1; 
#> post-warmup draws per chain=1000, total post-warmup draws=4000.
#> 
#>           mean se_mean   sd  2.5%   25%    50%    75%  97.5% n_eff
#> beta[1]  14.34    0.02 0.56 13.09 14.08  14.36  14.63  15.44  1060
#> beta[2] 102.64    0.10 4.41 94.15 99.60 102.62 105.61 111.35  1807
#> beta[3]   0.06    0.00 0.01  0.05  0.06   0.06   0.07   0.08  1353
#> beta[4]   1.13    0.00 0.08  0.98  1.07   1.12   1.18   1.31  1768
#> sigma     1.34    0.01 0.23  1.01  1.18   1.30   1.46   1.90  1114
#> alpha     0.71    0.00 0.17  0.34  0.60   0.72   0.84   0.97  1388
#>         Rhat
#> beta[1]    1
#> beta[2]    1
#> beta[3]    1
#> beta[4]    1
#> sigma      1
#> alpha      1
#> 
#> Samples were drawn using NUTS(diag_e) at Tue Jan 11 07:04:18 2022.
#> For each parameter, n_eff is a crude measure of effective sample size,
#> and Rhat is the potential scale reduction factor on split chains (at 
#> convergence, Rhat=1).
rstan::stan_dens(
  dilution_model, pars = c("beta", "sigma", "alpha"), 
  separate_chains=TRUE
) +
  scale_x_continuous(expand = expansion(c(0, 0))) +
  scale_y_continuous(expand = expansion(c(0, 0.02)))

Posterior predictive check

Below is a plot of the posterior predictive distributions of the model on the original data. The simulated curves visually appear to correspond well with the observed data indicating the model has good fit.

dilution_post_pred <- rstan::extract(dilution_model, "ypred")$ypred %>%
  as.data.frame() %>%
  as_tibble() %>%
  set_names(seq(1, ncol(.))) %>%
  mutate(draw = 1:n()) %>%
  pivot_longer(-c(draw), names_to = "idx") %>%
  left_join(
    dilution_standards_data %>% mutate(idx = as.character(1:n())),
    by = "idx"
  )
plt_n_draws <- 100
plt_draws <- sample(1:max(dilution_post_pred$draw), plt_n_draws)
dilution_post_pred %>%
  filter(draw %in% !!plt_draws) %>%
  ggplot(aes(x = conc, y = value)) +
  facet_wrap(vars(rep), nrow = 1) +
  geom_line(aes(group = draw), alpha = 0.1) +
  geom_line(
    aes(y = y, group = rep),
    data = dilution_standards_data,
    color = STAN_RED
  ) +
  geom_point(aes(y = y), data = dilution_standards_data, color = STAN_RED) +
  scale_x_continuous(expand = expansion(c(0, 0))) +
  scale_y_continuous(expand = expansion(c(0.02, 0.02)))


Session info

#> R version 4.1.2 (2021-11-01)
#> Platform: x86_64-apple-darwin17.0 (64-bit)
#> Running under: macOS Big Sur 10.16
#> 
#> Matrix products: default
#> BLAS:   /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRblas.0.dylib
#> LAPACK: /Library/Frameworks/R.framework/Versions/4.1/Resources/lib/libRlapack.dylib
#> 
#> locale:
#> [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
#> 
#> attached base packages:
#> [1] stats     graphics  grDevices datasets  utils     methods  
#> [7] base     
#> 
#> other attached packages:
#>  [1] forcats_0.5.1        stringr_1.4.0        dplyr_1.0.7         
#>  [4] purrr_0.3.4          readr_2.0.1          tidyr_1.1.3         
#>  [7] tibble_3.1.3         tidyverse_1.3.1      tidybayes_3.0.1     
#> [10] rstan_2.21.2         ggplot2_3.3.5        StanHeaders_2.21.0-7
#> 
#> loaded via a namespace (and not attached):
#>  [1] fs_1.5.0             matrixStats_0.61.0   lubridate_1.7.10    
#>  [4] RColorBrewer_1.1-2   httr_1.4.2           rprojroot_2.0.2     
#>  [7] tensorA_0.36.2       tools_4.1.2          backports_1.2.1     
#> [10] bslib_0.2.5.1        utf8_1.2.2           R6_2.5.0            
#> [13] DBI_1.1.1            colorspace_2.0-2     ggdist_3.0.0        
#> [16] withr_2.4.2          tidyselect_1.1.1     gridExtra_2.3       
#> [19] prettyunits_1.1.1    processx_3.5.2       downlit_0.2.1       
#> [22] curl_4.3.2           compiler_4.1.2       rvest_1.0.1         
#> [25] cli_3.0.1            arrayhelpers_1.1-0   xml2_1.3.2          
#> [28] labeling_0.4.2       posterior_1.1.0      sass_0.4.0          
#> [31] scales_1.1.1         checkmate_2.0.0      callr_3.7.0         
#> [34] digest_0.6.27        rmarkdown_2.10       pkgconfig_2.0.3     
#> [37] htmltools_0.5.1.1    highr_0.9            dbplyr_2.1.1        
#> [40] rlang_0.4.11         readxl_1.3.1         rstudioapi_0.13     
#> [43] jquerylib_0.1.4      farver_2.1.0         generics_0.1.0      
#> [46] svUnit_1.0.6         jsonlite_1.7.2       distill_1.2         
#> [49] distributional_0.2.2 inline_0.3.19        magrittr_2.0.1      
#> [52] loo_2.4.1            Rcpp_1.0.7           munsell_0.5.0       
#> [55] fansi_0.5.0          abind_1.4-5          lifecycle_1.0.0     
#> [58] stringi_1.7.3        yaml_2.2.1           pkgbuild_1.2.0      
#> [61] grid_4.1.2           parallel_4.1.2       crayon_1.4.1        
#> [64] lattice_0.20-45      haven_2.4.3          hms_1.1.0           
#> [67] knitr_1.33           ps_1.6.0             pillar_1.6.2        
#> [70] codetools_0.2-18     clisymbols_1.2.0     stats4_4.1.2        
#> [73] reprex_2.0.1         glue_1.4.2           evaluate_0.14       
#> [76] V8_3.4.2             renv_0.14.0          RcppParallel_5.1.4  
#> [79] modelr_0.1.8         vctrs_0.3.8          tzdb_0.1.2          
#> [82] cellranger_1.1.0     gtable_0.3.0         assertthat_0.2.1    
#> [85] xfun_0.25            broom_0.7.9          coda_0.19-4         
#> [88] ellipsis_0.3.2       here_1.0.1

  1. Gelman A, Chew GL, Shnaidman M. Bayesian analysis of serial dilution assays. Biometrics. 2004 Jun;60(2):407-17. doi: 10.1111/j.0006-341X.2004.00185.x. PMID: 15180666. https://pubmed.ncbi.nlm.nih.gov/15180666/↩︎